# R script for making final datasets for replicating all analysis
# in 'When Parties Move to the Middle: The Role of Uncertainty'
# by J. Lindvall, D. Rueda, and H. Zhai
# this file written by: H Zhai (2022-11-03 [updated: -])
# on device: Mac Pro 13 Dual-Core Intel Core i5 2.3 GHz 

# PLEASE MAKE SURE ALL THE REPLICATION FILES (DATA AND SCRIPTS) ARE STORED AT THE SAME LEVEL IN THE SAME DIRECTORY
# OR MAKE SURE THE DIRECTORY-RELATED CODES ARE PROPERLY ADJUSTED 
# TO ENSURE THE CODES RUN WITHOUT DIRECTORY-RELATED PROBLEMS
# RESTART R SESSION BEFORE RUNNING

# This file combines the measure variables and makes the final (core and full) datasets for main and supplementary analyses

# BEGIN SCRIPT
rm(list = ls())

# pkgs --------------------------------------------------------------------

if (!require("tidyverse")) install.packages("tidyverse")
if (!require("magrittr")) install.packages("magrittr")

# load data ---------------------------------------------------------------

# make sure the .RData files are either downloaded from Dataverse or `R_0_1_make_measures.R` has been run

load("RData_measure_party.RData")
load("RData_measure_voter.RData")
load("RData_measure_marpor.RData")
load("RData_measure_cpds.RData")

# subset data -------------------------------------------------------------

measure_voter_core <- select(measure_voter, -ends_with("_den"), -ends_with("_log")) # drop pre-tran density
measure_party_core <- select(measure_party, -ends_with("_ml"), -ends_with("mr")) # drop ml/r only 

# merge data --------------------------------------------------------------

data_full <- full_join(measure_voter_core, measure_party_core) %>% # join base measures
  left_join(., measure_marpor) %>% # add marpor measures
  left_join(., measure_cpds) %>% # add cpds measures
  arrange(., countryname, eyear) # sort by country-year

# clean data --------------------------------------------------------------

data_clean <- data_full %>%
  group_by(countryname) %>%
  mutate_at(.vars = vars(matches("_med|_rlog|_mid|_dif")),
            .funs = list(lag = ~dplyr::lag(., 1))) %>% # lag key vars
  ungroup() %>% 
  filter(eyear >= 1945) %>% # post-1945 only 
  mutate(eyear = as.factor(eyear), 
         country = as.factor(country),
         countryname =  as.factor(countryname))

# core data (20) ----------------------------------------------------------

## core IDs
IDs.ind <- c("Australia","Austria", "Belgium", "Canada", "Denmark", 
             "Finland", "France", "Germany", "Iceland", "Italy", "Luxembourg", 
             "Netherlands", "New Zealand", "Norway", "Portugal", 
             "Spain", "Sweden", "Switzerland", "United Kingdom", "United States")

## clean core data
data_core <- data_full %>%
  group_by(countryname) %>%
  mutate_at(.vars = vars(matches("_med|_rlog|_mid|_dif")),
            .funs = list(lag = ~dplyr::lag(., 1))) %>% # lag key vars
  ungroup() %>% 
  filter(eyear >= 1965) %>% # post-1965 only 
  filter(countryname %in% IDs.ind) %>% # adv. only
  mutate(eyear = as.factor(eyear),
         country = as.factor(country),
         countryname = as.factor(countryname))

# core data II (party-level) ----------------------------------------------

## load extra raw data
load("RData_manifesto_mlr.RData") 
load("RData_measure_scale.RData") 

## make party-level variables
data_core2 <- 
  manifesto_mlr %>%
  cbind.data.frame(leftright) %>% 
  filter(party_mlr %in% c("main left","main right"), country %in% data_core$country) %>% # don't filter by eyear yet (for lags)
  group_by(country, edate, party_mlr) %>% 
  mutate(leftright_mean = mean(leftright, na.rm = TRUE)) %>% 
  ungroup() %>% 
  pivot_wider(names_from = party_mlr, values_from = leftright_mean) %>% 
  select(countryname,edate,main_left=`main left`,main_right=`main right`) %>% 
  group_by(countryname,edate) %>% 
  summarise(main_left = unique(na.omit(main_left)), main_right = unique(na.omit(main_right))) %>% 
  group_by(countryname) %>% 
  mutate(main_left_lag = lag(main_left), main_right_lag = lag(main_right)) %>% 
  ungroup() %>% 
  left_join(data_core, ., by=c("countryname","edate"))

## make new lhs & rhs variables
data_core2 %<>%
  mutate(
    lhs_ml = main_left - main_left_lag,
    lhs_mr = main_right - main_right_lag,
    rhs_ml = main_left_lag - leftright_med_lag,
    rhs_mr = main_right_lag - leftright_med_lag
  ) %>% 
  group_by(country) %>% 
  mutate(
    lhs_ml_lag = dplyr::lag(lhs_ml),
    lhs_mr_lag = dplyr::lag(lhs_mr),
    rhs_ml_lag = dplyr::lag(rhs_ml),
    rhs_mr_lag = dplyr::lag(rhs_mr)
  ) %>% 
  ungroup()

# save data ---------------------------------------------------------------

save(data_clean, file = "RData_data_final.RData")
save(data_core, file = "RData_data_core.RData")
save(data_core2, file = "RData_data_core2.RData")

# clean env. --------------------------------------------------------------
rm(list = ls())

# END SCRIPT